#Load Libraries
library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
library(purrr)
library(tibble)
library(stringr)
library(forcats)
library(ggplot2)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:dplyr':
##
## intersect, setdiff, union
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
library(reshape)
##
## Attaching package: 'reshape'
## The following objects are masked from 'package:reshape2':
##
## colsplit, melt, recast
## The following object is masked from 'package:lubridate':
##
## stamp
## The following object is masked from 'package:dplyr':
##
## rename
## The following objects are masked from 'package:tidyr':
##
## expand, smiths
library(network)
## network: Classes for Relational Data
## Version 1.16.0 created on 2019-11-30.
## copyright (c) 2005, Carter T. Butts, University of California-Irvine
## Mark S. Handcock, University of California -- Los Angeles
## David R. Hunter, Penn State University
## Martina Morris, University of Washington
## Skye Bender-deMoll, University of Washington
## For citation information, type citation("network").
## Type help("network-package") to get started.
library(tidygraph)
##
## Attaching package: 'tidygraph'
## The following object is masked from 'package:reshape':
##
## rename
## The following object is masked from 'package:MASS':
##
## select
## The following object is masked from 'package:stats':
##
## filter
library(ggraph)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:reshape':
##
## rename
## The following object is masked from 'package:MASS':
##
## select
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(visNetwork)
library(networkD3)
library(here)
## here() starts at /Users/abigailhorn/Documents/GitHub/PM566
lab.dir <- here("static/slides/11-interactive-viz/lab/")
#Load data
mapping <- read.csv(paste0(lab.dir,"Covid research - SPA Mapping.csv"))
#case_data <- read.csv("latimes-place-totals.csv")
case_data <- read.csv("https://raw.githubusercontent.com/datadesk/california-coronavirus-data/master/latimes-place-totals.csv ")
case_data <- subset(case_data, county == "Los Angeles")
str(mapping)
## 'data.frame': 782 obs. of 5 variables:
## $ City : Factor w/ 377 levels "","Acton","Adams-Normandie",..: 2 2 5 5 9 9 12 12 36 36 ...
## $ place : Factor w/ 711 levels "- Under Investigation",..: 2 545 5 546 9 548 12 659 36 659 ...
## $ SPA : Factor w/ 12 levels "","1","2","3",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ SPA.Name : Factor w/ 11 levels "","Antelope Valley",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ Clean.Name: Factor w/ 344 levels "","Acton","Adams-Normandie",..: 2 2 5 5 9 9 12 12 30 30 ...
str(case_data)
## 'data.frame': 65601 obs. of 8 variables:
## $ date : Factor w/ 220 levels "2020-03-16","2020-03-17",..: 219 219 219 219 219 219 219 219 219 219 ...
## $ county : Factor w/ 49 levels "Alameda","Amador",..: 16 16 16 16 16 16 16 16 16 16 ...
## $ fips : int 37 37 37 37 37 37 37 37 37 37 ...
## $ place : Factor w/ 1242 levels "90755: Long Beach",..: 193 194 196 197 201 205 207 211 213 214 ...
## $ confirmed_cases: int 78 292 202 30 1383 394 762 19 2 78 ...
## $ note : Factor w/ 9 levels "","1 to 4","1 to 5",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ x : num -118 -118 -119 -118 -118 ...
## $ y : num 34.5 34 34.2 34.5 34.1 ...
#Fix dates
case_data$date <- as.Date(case_data$date)
#Merge datasets
case_data$place <- as.character(case_data$place)
mapping$place <- as.character(mapping$place)
mapping$City.Name <- mapping$Clean.Name
mapping$Clean.Name <- NULL
data <- merge(case_data, mapping, by = "place", all = T)
str(data)
## 'data.frame': 71739 obs. of 12 variables:
## $ place : chr "- Under Investigation" "Acton" "Acton" "Acton" ...
## $ date : Date, format: NA "2020-06-13" ...
## $ county : Factor w/ 49 levels "Alameda","Amador",..: NA 16 16 16 16 16 16 16 16 16 ...
## $ fips : int NA 37 37 37 37 37 37 37 37 37 ...
## $ confirmed_cases: int NA 16 73 20 8 33 70 45 78 11 ...
## $ note : Factor w/ 9 levels "","1 to 4","1 to 5",..: NA 1 1 1 1 1 1 1 1 1 ...
## $ x : num NA -118 -118 -118 -118 ...
## $ y : num NA 34.5 34.5 34.5 34.5 ...
## $ City : Factor w/ 377 levels "","Acton","Adams-Normandie",..: 1 2 2 2 2 2 2 2 2 2 ...
## $ SPA : Factor w/ 12 levels "","1","2","3",..: 12 2 2 2 2 2 2 2 2 2 ...
## $ SPA.Name : Factor w/ 11 levels "","Antelope Valley",..: 10 2 2 2 2 2 2 2 2 2 ...
## $ City.Name : Factor w/ 344 levels "","Acton","Adams-Normandie",..: 290 2 2 2 2 2 2 2 2 2 ...
#Create daily table total
summary.LA <- data %>%
group_by(date) %>%
summarise(confirmed_cases = sum(confirmed_cases)) %>%
ungroup()
summary.LA
summary.LA = summary.LA %>%
arrange(date) %>% # first sort by day
mutate(Diff_day = date - lag(date), # Difference in time (just in case there are gaps)
new_cases = confirmed_cases - lag(confirmed_cases)) # Difference in case between days
summary.LA
#Create daily table per City
summary.city <- data %>%
group_by(date, City.Name) %>%
summarise(confirmed_cases = sum(confirmed_cases)) %>%
ungroup()
## Warning: Factor `City.Name` contains implicit NA, consider using
## `forcats::fct_explicit_na`
summary.city
#Add number of new case per city
summary.city = summary.city %>%
group_by(City.Name) %>%
arrange(date) %>% # first sort by day
mutate(Diff_day = date - lag(date), # Difference in time (just in case there are gaps)
new_cases = confirmed_cases - lag(confirmed_cases)) %>% # Difference in case between days
arrange(City.Name)
## Warning: Factor `City.Name` contains implicit NA, consider using
## `forcats::fct_explicit_na`
## Warning: Factor `City.Name` contains implicit NA, consider using
## `forcats::fct_explicit_na`
## Warning: Factor `City.Name` contains implicit NA, consider using
## `forcats::fct_explicit_na`
## Warning: Factor `City.Name` contains implicit NA, consider using
## `forcats::fct_explicit_na`
summary.city
#forcats::fct_explicit_na
summary.city <- merge(summary.city, mapping, by = "City.Name", all = F)
summary.city
#write_csv(summary.city, "summary_city.csv")
#Create daily table per SPA
summary.SPA <- data %>%
group_by(date, SPA, SPA.Name) %>%
summarise( confirmed_cases = sum(confirmed_cases)) %>%
ungroup()
## Warning: Factor `SPA` contains implicit NA, consider using
## `forcats::fct_explicit_na`
## Warning: Factor `SPA.Name` contains implicit NA, consider using
## `forcats::fct_explicit_na`
summary.SPA
summary.SPA = summary.SPA %>%
arrange(SPA,date) %>% # first sort by day
mutate(Diff_day = date - lag(date), # Difference in time (just in case there are gaps)
new_cases = confirmed_cases - lag(confirmed_cases)) %>% # Difference in case between days
arrange(SPA)
summary.SPA
#write_csv(summary.SPA, "Summary_SPA.csv")
LA.plot <- ggplot(data = summary.LA, aes(x = date, y = confirmed_cases)) +
geom_line()
ggplotly(LA.plot)
SPA.plot <- ggplot(data = subset(summary.SPA, SPA == 1 | SPA == 2 | SPA == 3 | SPA == 4 | SPA == 5 | SPA == 6 | SPA == 7 | SPA == 8), aes(x = date, y = confirmed_cases, color = SPA)) +
geom_line()
ggplotly(SPA.plot)
City.plot <- ggplot(data = subset(summary.city, City %in% c("Woodland Hills", "Sierra Madre")), aes(x = date, y = confirmed_cases, color = City)) +
geom_line()
ggplotly(City.plot)
{-}
#write_csv(data, "data.csv")